#8/12/2015
#replication materials for Spirling "Democratization and Linguistic Complexity", JOP

#Figure 3 plots sentence length and syllables per word (mean overview)
# for cabinet and non-cabinet
#Figure 4 plots average length of speeches for cabinet and noncabinet MPs over time
#Figure 5 is boxplot comparing Hardie and Balfour (texts as given in appx)

rm(list=ls())

#load the data
#setwd("C:/Users/as9934/Dropbox/complexity/August2015/JOP_replication_data/")
load("bigframe.rdata")


############################
### Figure 3################
############################

sessions <- as.character( unique(big.frame$year.dummy) )

#matrix to take the results
res.mat <- data.frame(cab.sent=NA, cab.syll=NA, non.sent=NA, non.syll=NA)


for(i in 1:length(sessions)){
  dat <- big.frame[big.frame$year.dummy==sessions[i], ]
  
  
  cab <- dat[dat$cabinet==1,]
  cab.sent.length <- mean(cab$word.count/cab$sentence.count)
  res.mat[i,1] <- cab.sent.length
  
  cab.syll <- mean(cab$syllable.count/cab$word.count)
  res.mat[i,2] <- cab.syll
  
  noncab <- dat[dat$cabinet==0,]
  non.sent.length <- mean(noncab$word.count/noncab$sentence.count)
  res.mat[i,3] <- non.sent.length
  
  non.syll <- mean(noncab$syllable.count/noncab$word.count)
  res.mat[i,4] <- non.syll
  
}

par(bg='cornsilk1')
par(mfrow=c(1,2))
par(mgp= c(2, 1, 0))

#plot words per sentence
plot(1:length(sessions),res.mat[,1], ylab="mean sent length", xlab="", type="l", lwd=2, axes=F)
lines(1:length(sessions), res.mat[,3], lwd=1, lty=2)
axis(1,at=1:length(sessions), sessions)
axis(2)
box()
legend("bottomleft",lty=c(1,2), lwd=c(2,1), col=c("black","black"), legend=c("cab","noncab"), bty="n")

#plot syllables per word
plot(1:length(sessions), res.mat[,2], ylab="mean sylls per word", xlab="", type="l", lwd=2, axes=F)
lines(1:length(sessions), res.mat[,4], lwd=1, lty=2)
axis(1,at=1:length(sessions), sessions)
axis(2)
box()
legend("bottomleft",lty=c(1,2), lwd=c(2,1), col=c("black","black"), legend=c("cab","noncab"), bty="n")

###############################
### Figure 4 ##################
###############################

X11()

#set up some vectors to take results
wcountcab <- c()
wcountnon <- c()


for(i in 1:length(unique(big.frame$year.dummy))){
  
  sub <- big.frame[big.frame$year.dummy==unique(big.frame$year.dummy)[i],]
  
  subcab <- sub[sub$cabinet==1,]
  wcountcab <- c(wcountcab, mean(subcab$word.count))
  
  
  
  subnon <- sub[sub$cabinet==0,]
  wcountnon <- c(wcountnon, mean(subnon$word.count))
  
}

par(bg='cornsilk1')
plot(1:length(sessions), wcountcab, axes=F, pch=22, col="black", bg="pink", cex=1.5,
     ylab="", xlab="" )
axis(1, at=1:length(sessions), labels=sessions)
axis(2)
points(1:length(sessions), wcountnon, pch=21, col="black", bg="green", cex=1.5)
box()
legend("topleft", pch=c(22,21), col=c("black","black"), pt.bg=c("pink","green"), 
       legend=c("cabinet","non-cabinet"), pt.cex=c(1.5,1.5))


###############################
### Figure 5 ##################
###############################
X11()

balfour <-c(47.72460, 51.63518, 43.67178, 52.70469, 50.90355,44.95354, 45.09750)

hardie <-c(64.4283469809544, 63.9055768205566, 50.5082182657076, 61.3105436303335, 55.1278390113993, 58.8502434353679, 63.5047437731243, 60.3454720293998)


par(bg='cornsilk1')
boxplot(balfour, hardie, names=c("Balfour","Hardie"), ylab="score")


